1 Load packages and dataset

#package
library(SmartPhos)
library(PhosR)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(DEP)
library(SummarizedExperiment)
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::collapse()   masks IRanges::collapse()
## ✖ dplyr::combine()    masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count()      masks matrixStats::count()
## ✖ dplyr::desc()       masks IRanges::desc()
## ✖ tidyr::expand()     masks S4Vectors::expand()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::first()      masks S4Vectors::first()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce()     masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename()     masks S4Vectors::rename()
## ✖ dplyr::slice()      masks IRanges::slice()

2 Create input file table

rawFolder <- rawFolder <- system.file("example/rawData", package = "SmartPhos")
fileTable <- generateInputTable(rawFolder)

Check if the table is correct

fileTable
##                                                                                                                     fileName
## 1       /Library/Frameworks/R.framework/Versions/4.2/Resources/library/SmartPhos/example/rawData/100ug_txt/proteinGroups.txt
## 2       /Library/Frameworks/R.framework/Versions/4.2/Resources/library/SmartPhos/example/rawData/100ug_txt/proteinGroups.txt
## 3  /Library/Frameworks/R.framework/Versions/4.2/Resources/library/SmartPhos/example/rawData/100ug_txt/Phospho (STY)Sites.txt
## 4  /Library/Frameworks/R.framework/Versions/4.2/Resources/library/SmartPhos/example/rawData/100ug_txt/Phospho (STY)Sites.txt
## 5       /Library/Frameworks/R.framework/Versions/4.2/Resources/library/SmartPhos/example/rawData/200ug_txt/proteinGroups.txt
## 6       /Library/Frameworks/R.framework/Versions/4.2/Resources/library/SmartPhos/example/rawData/200ug_txt/proteinGroups.txt
## 7  /Library/Frameworks/R.framework/Versions/4.2/Resources/library/SmartPhos/example/rawData/200ug_txt/Phospho (STY)Sites.txt
## 8  /Library/Frameworks/R.framework/Versions/4.2/Resources/library/SmartPhos/example/rawData/200ug_txt/Phospho (STY)Sites.txt
## 9       /Library/Frameworks/R.framework/Versions/4.2/Resources/library/SmartPhos/example/rawData/400ug_txt/proteinGroups.txt
## 10      /Library/Frameworks/R.framework/Versions/4.2/Resources/library/SmartPhos/example/rawData/400ug_txt/proteinGroups.txt
## 11 /Library/Frameworks/R.framework/Versions/4.2/Resources/library/SmartPhos/example/rawData/400ug_txt/Phospho (STY)Sites.txt
## 12 /Library/Frameworks/R.framework/Versions/4.2/Resources/library/SmartPhos/example/rawData/400ug_txt/Phospho (STY)Sites.txt
## 13       /Library/Frameworks/R.framework/Versions/4.2/Resources/library/SmartPhos/example/rawData/50ug_txt/proteinGroups.txt
## 14       /Library/Frameworks/R.framework/Versions/4.2/Resources/library/SmartPhos/example/rawData/50ug_txt/proteinGroups.txt
## 15  /Library/Frameworks/R.framework/Versions/4.2/Resources/library/SmartPhos/example/rawData/50ug_txt/Phospho (STY)Sites.txt
## 16  /Library/Frameworks/R.framework/Versions/4.2/Resources/library/SmartPhos/example/rawData/50ug_txt/Phospho (STY)Sites.txt
##           sample            type     batch                      id
## 1       FP_100ug        proteome 100ug_txt      100ug_txt_FP_100ug
## 2  Phospho_100ug        proteome 100ug_txt 100ug_txt_Phospho_100ug
## 3       FP_100ug phosphoproteome 100ug_txt      100ug_txt_FP_100ug
## 4  Phospho_100ug phosphoproteome 100ug_txt 100ug_txt_Phospho_100ug
## 5       FP_200ug        proteome 200ug_txt      200ug_txt_FP_200ug
## 6  Phospho_200ug        proteome 200ug_txt 200ug_txt_Phospho_200ug
## 7       FP_200ug phosphoproteome 200ug_txt      200ug_txt_FP_200ug
## 8  Phospho_200ug phosphoproteome 200ug_txt 200ug_txt_Phospho_200ug
## 9       FP_400ug        proteome 400ug_txt      400ug_txt_FP_400ug
## 10 Phospho_400ug        proteome 400ug_txt 400ug_txt_Phospho_400ug
## 11      FP_400ug phosphoproteome 400ug_txt      400ug_txt_FP_400ug
## 12 Phospho_400ug phosphoproteome 400ug_txt 400ug_txt_Phospho_400ug
## 13       FP_50ug        proteome  50ug_txt        50ug_txt_FP_50ug
## 14  Phospho_50ug        proteome  50ug_txt   50ug_txt_Phospho_50ug
## 15       FP_50ug phosphoproteome  50ug_txt        50ug_txt_FP_50ug
## 16  Phospho_50ug phosphoproteome  50ug_txt   50ug_txt_Phospho_50ug

Add a new column indicate concentration and sample type (FP or Phospho)

fileTable$conc <- as.numeric(str_remove(fileTable$batch,"ug_txt"))
fileTable$sampleType <- str_extract(fileTable$sample,"FP|Phospho")

3 Parse the whole experiment using the readExperiment function from SmartPhos

testData <- readExperiment(fileTable, annotation_col = c("conc","sampleType"))
## [1] "Processing phosphoproteomic data"
## [1] "Processing proteomic data"

Check the data

testData
## A MultiAssayExperiment object of 2 listed
##  experiments with user-defined names and respective classes.
##  Containing an ExperimentList class object of length 2:
##  [1] Phosphoproteome: SummarizedExperiment with 10357 rows and 8 columns
##  [2] Proteome: SummarizedExperiment with 4406 rows and 8 columns
## Functionality:
##  experiments() - obtain the ExperimentList instance
##  colData() - the primary/phenotype DataFrame
##  sampleMap() - the sample coordination DataFrame
##  `$`, `[`, `[[` - extract colData columns, subset, or experiment
##  *Format() - convert into a long or wide DataFrame
##  assays() - convert ExperimentList to a SimpleList of matrices
##  exportClass() - save data to flat files

4 Explore the imported data set

4.1 Check phosphoproteome data

Subset for phosphoproteomic data

ppe <- testData[["Phosphoproteome"]]
colData(ppe) <- colData(testData)

4.1.1 Examin the data distrubution

countMat <- assay(ppe)

4.1.2 Missing value per sample

plotTab <- tibble(sample = ppe$sample, 
                  perNA = colSums(is.na(countMat))/nrow(countMat))
ggplot(plotTab, aes(x=sample, y=1-perNA)) +
    geom_bar(stat = "identity") +
    ylab("completeness") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=0))

4.1.3 For paired FP and Phospho samples, if a peptide is detected in FP sample, what is the chance it can also be detected in phospho sample

sumTab <- lapply(unique(ppe$batch), function(condi) {
    subMat <- countMat[,ppe$batch == condi]
    subMat <- subMat[!is.na(subMat[,grepl("FP",colnames(subMat))]),]
    resTab <- tibble(nTotal = nrow(subMat),
                     nPhos = sum(!is.na(subMat[,(grepl("Phospho",colnames(subMat)))])),
                     batch = condi)
}) %>% bind_rows() %>%
    pivot_longer(-batch)

ggplot(sumTab, aes(x=batch, y=value, fill=name)) +
    geom_bar(stat = "identity", position = "stack") +
    ylab("Number of detection")

The FP sample should not have too much meaning here…

4.1.4 Remove FP sample in the Phosphoproteome experiment

ppePhos <- ppe[,ppe$sampleType != "FP"]
ppePhos <- ppePhos[rowSums(!is.na(assay(ppePhos)))>0,]
dim(ppePhos)
## [1] 10189     4

4.1.5 How many feature have unique protein mapping?

uniqueVal <- !str_detect(rowData(ppePhos)$UniprotID,";")
table(uniqueVal)
## uniqueVal
## FALSE  TRUE 
##   264  9925

4.1.6 Missing value per sample

countMat <- assay(ppePhos)
plotTab <- tibble(sample = ppePhos$sample, 
                  perNA = colSums(is.na(countMat))/nrow(countMat))
ggplot(plotTab, aes(x=sample, y=1-perNA)) +
    geom_bar(stat = "identity") +
    ylab("completeness") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=0))

Plot a cumulative curve of missing value cut-off and remaining number of features

missRate <- tibble(id = rownames(countMat),
                   rate = rowSums(is.na(countMat))/ncol(countMat))
cumTab <- lapply(seq(0,1,0.05), function(cutRate) {
    tibble(cut= cutRate,
           per = sum(missRate$rate <= cutRate)/nrow(missRate))
} ) %>%
    bind_rows()
ggplot(cumTab, aes(x=cut,y=per)) +
    geom_line() +
    xlab("Allowed missing value rate") +
    ylab("Percentage of remaining features")

Missing value heatmap to check missing value structure (sample 1000 sites)

DEP::plot_missval(ppePhos[sample(seq(nrow(ppePhos)),1000),])

Rather random

Missing value pattern

ppeLog2 <- ppePhos
assay(ppeLog2) <- log2(assay(ppeLog2))
plot_detect(ppeLog2) 

Keep proteins detected in at least half of the sample (missing rate <= 0.5)

ppePhosFilt <- ppePhos[filter(missRate, rate <=0.5)$id,]
dim(ppePhosFilt)
## [1] 7295    4

4.1.7 Look at count table distribution

4.1.7.1 Raw scale

countMat <- assay(ppePhosFilt)
countTab <- countMat %>% as_tibble(rownames = "id") %>% 
    pivot_longer(-id) %>%
    filter(!is.na(value)) %>%
    mutate(log2Val = log2(value))
ggplot(countTab, aes(x=name, y=log2Val)) +
    geom_boxplot() + geom_point()

4.1.7.2 Imputation and normalization

logMat <- log2(assay(ppePhosFilt))
#assays(ppePhosFilt)[["Quantification"]] <- logMat
logMat <- tImpute(logMat)
logMat <- medianScaling(logMat, scale = FALSE)
assays(ppePhosFilt)[["Quantification"]] <- logMat

4.1.7.3 Boxplot

countMat <- assays(ppePhosFilt)[["Quantification"]]
countTab <- countMat %>% as_tibble(rownames = "id") %>% 
    pivot_longer(-id) %>%
    filter(!is.na(value))
ggplot(countTab, aes(x=name, y=value)) +
    geom_boxplot() + geom_point()

4.1.7.4 Mean versus variant

plotTab <- tibble(meanVal = rowMeans(countMat),
                  var = apply(countMat, 1, var))
ggplot(plotTab, aes(x=meanVal,y=var)) +
    geom_point()

4.1.7.5 Heatmap visualization

library(pheatmap)
#select top 1000 most variant
plotMat <- countMat[order(plotTab$var, decreasing = TRUE)[1:1000],]
pheatmap(plotMat, show_rownames = FALSE)

4.1.7.6 Sample similarity

#select top 1000 most variant
plotMat <- countMat[order(plotTab$var, decreasing = TRUE)[1:1000],]
pheatmap(as.matrix(dist(t(plotMat))))

4.1.7.7 Detect proteins that follows the trend

library(limma)
## 
## Attaching package: 'limma'
## The following object is masked from 'package:BiocGenerics':
## 
##     plotMA
testMat <- assays(ppePhosFilt)[["Quantification"]]
conc <- ppePhosFilt$conc
designMat <- model.matrix(~conc)
rowAnno <- rowData(ppePhosFilt)
fit <- lmFit(testMat, designMat)
fit2 <- eBayes(fit)
resTab <- topTable(fit2, number = Inf) %>%
    as_tibble(rownames = "id") %>%
    mutate(UniprotID = rowAnno[id,]$UniprotID,
           Gene = rowAnno[id,]$Gene,
           Site = rowAnno[id,]$Position,
           Residue = rowAnno[id,]$Residue)
## Removing intercept from test coefficients
hist(resTab$P.Value)

Plot top 9 associations

pList <- lapply(seq(9), function(i) {
    rec <- resTab[i,]
    plotTab <- tibble(expr = testMat[rec$id,],
                      conc = conc)
    ggplot(plotTab, aes(x=conc, y=expr)) +
        geom_point() + geom_smooth(method="lm") +
        ggtitle(sprintf("%s (%s%s)",rec$Gene, rec$Residue,rec$Site))
})
cowplot::plot_grid(plotlist = pList, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

Table of significant associations

resTab %>% filter(adj.P.Val < 0.2) %>% mutate_if(is.numeric, formatC, digits=1) %>%
  DT::datatable()

4.2 Check full proteome

Subset for phosphoproteomic data

fpe <- testData[["Proteome"]]
colData(fpe) <- colData(testData)

4.2.1 Examin the data distrubution

countMat <- assay(fpe)

4.2.2 Missing value per sample

plotTab <- tibble(sample = fpe$sample, 
                  perNA = colSums(is.na(countMat))/nrow(countMat))
ggplot(plotTab, aes(x=sample, y=1-perNA)) +
    geom_bar(stat = "identity") +
    ylab("completeness") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=0))

4.2.3 For paired FP and Phospho samples, if a peptide is detected in FP sample, what is the chance it can also be detected in phospho sample

sumTab <- lapply(unique(fpe$batch), function(condi) {
    subMat <- countMat[,fpe$batch == condi]
    subMat <- subMat[!is.na(subMat[,grepl("FP",colnames(subMat))]),]
    resTab <- tibble(nTotal = nrow(subMat),
                     nPhos = sum(!is.na(subMat[,(grepl("Phospho",colnames(subMat)))])),
                     batch = condi)
}) %>% bind_rows() %>%
    pivot_longer(-batch)

ggplot(sumTab, aes(x=batch, y=value, fill=name)) +
    geom_bar(stat = "identity", position = "stack") +
    ylab("Number of detection")

Less phospho proteome samples will be detect, which makes sense. As non-phosphorylated proteins will be lost during the enrichment process.

Missing value heatmap to check missing value structure

DEP::plot_missval(fpe[sample(seq(nrow(fpe)),1000),])

4.2.4 Look at full proteome only

fpeProt <- fpe[,fpe$sampleType == "FP"]
fpeProt <- fpeProt[rowSums(!is.na(assay(fpeProt)))>0,]
countMat <- assay(fpeProt)
dim(fpeProt)
## [1] 4298    4

4.2.5 How many feature have unique protein mapping?

uniqueVal <- !str_detect(rowData(fpeProt)$UniprotID,";")
table(uniqueVal)
## uniqueVal
## FALSE  TRUE 
##   539  3759

Plot a cumulative curve of missing value cut-off and remaining number of features

missRate <- tibble(id = rownames(countMat),
                   rate = rowSums(is.na(countMat))/ncol(countMat))
cumTab <- lapply(seq(0,1,0.05), function(cutRate) {
    tibble(cut= cutRate,
           per = sum(missRate$rate <= cutRate)/nrow(missRate))
} ) %>%
    bind_rows()
ggplot(cumTab, aes(x=cut,y=per)) +
    geom_line() +
    xlab("Allowed missing value rate") +
    ylab("Percentage of remaining features")

Keep proteins detected in at least half of the samples (missing rate <= 0.5)

fpeProtFilt <- fpeProt[filter(missRate, rate <=0.5)$id,]
dim(fpeProtFilt)
## [1] 3616    4

4.2.6 Look at count table distribution

4.2.6.1 Raw scale

countMat <- assay(fpeProtFilt)
countTab <- countMat %>% as_tibble(rownames = "id") %>% 
    pivot_longer(-id) %>%
    filter(!is.na(value)) %>%
    mutate(log2Val = log2(value))
ggplot(countTab, aes(x=name, y=log2Val)) +
    geom_boxplot() + geom_point() +
    theme(axis.text.x = element_text(angle = 90, hjust=1, vjust=0.5))

4.2.7 Imputation and normalization

logMat <- log2(assay(fpeProtFilt))
#assays(ppePhosFilt)[["Quantification"]] <- logMat
logMat <- tImpute(logMat)
logMat <- medianScaling(logMat, scale = FALSE)
assays(fpeProtFilt)[["Quantification"]] <- logMat

4.2.7.1 Boxplot

countMat <- assays(fpeProtFilt)[["Quantification"]]
countTab <- countMat %>% as_tibble(rownames = "id") %>% 
    pivot_longer(-id) %>%
    filter(!is.na(value))
ggplot(countTab, aes(x=name, y=value)) +
    geom_boxplot() + geom_point()

4.2.7.2 Mean versus variant

plotTab <- tibble(meanVal = rowMeans(countMat),
                  var = apply(countMat, 1, var))
ggplot(plotTab, aes(x=meanVal,y=var)) +
    geom_point()

4.2.8 Heatmap visualization

library(pheatmap)
#select top 1000 most variant
plotMat <- countMat[order(plotTab$var, decreasing = TRUE)[1:1000],]
pheatmap(plotMat, show_rownames = FALSE)

4.2.9 Sample similarity

#select top 1000 most variant
plotMat <- countMat[order(plotTab$var, decreasing = TRUE)[1:1000],]
pheatmap(as.matrix(dist(t(plotMat))))

4.2.10 Detect proteins that follows the trend

library(limma)
testMat <- assays(fpeProtFilt)[["Quantification"]]
conc <- fpeProtFilt$conc
designMat <- model.matrix(~conc)
rowAnno <- rowData(fpeProtFilt)
fit <- lmFit(testMat, designMat)
fit2 <- eBayes(fit)
resTab <- topTable(fit2, number = Inf) %>%
    as_tibble(rownames = "id") %>%
    mutate(UniprotID = rowAnno[id,]$UniprotID,
           Gene = rowAnno[id,]$Gene)
## Removing intercept from test coefficients
hist(resTab$P.Value)

Plot top 9 associations

pList <- lapply(seq(9), function(i) {
    rec <- resTab[i,]
    plotTab <- tibble(expr = testMat[rec$id,],
                      conc = conc)
    ggplot(plotTab, aes(x=conc, y=expr)) +
        geom_point() + geom_smooth(method="lm") +
        ggtitle(sprintf("%s",rec$Gene))
})
cowplot::plot_grid(plotlist = pList, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

4.3 Compare phosphoproteomic measure and full proteomic measure

4.3.1 Overlap in the “Phos” samples

phosData <- testData[,testData$sampleType == "Phospho"]

sumTab <- lapply(unique(phosData$sample), function(n) {
    subPhos <- phosData[["Phosphoproteome"]][,phosData$sample==n]
    subProt <- phosData[["Proteome"]][,phosData$sample==n]
    subPhos <- subPhos[!is.na(assay(subPhos)[,1]),]
    subProt <- subProt[!is.na(assay(subProt)[,1]),]
    idPhos <- unique(rowData(subPhos)$Gene)
    idProt <- unique(rowData(subProt)$Gene)
    tibble(sample = n, overlap = length(intersect(idPhos, idProt)),
           onlyPhos = length(setdiff(idPhos, idProt)),
           onlyProt = length(setdiff(idProt, idPhos)))
}) %>% bind_rows() %>%
    pivot_longer(-sample)
ggplot(sumTab, aes(x=sample, y=value, fill = name)) +
    geom_bar(stat="identity")

4.3.2 Overlap between FP samples in proteomic measure and Phospho samples in phospho proteomic measure

sumTab <- lapply(unique(testData$batch), function(n) {
    subPhos <- testData[["Phosphoproteome"]][,testData$batch==n & testData$sampleType == "Phospho"]
    subProt <- testData[["Proteome"]][,testData$batch==n & testData$sampleType == "FP"]
    subPhos <- subPhos[!is.na(assay(subPhos)[,1]),]
    subProt <- subProt[!is.na(assay(subProt)[,1]),]
    idPhos <- unique(rowData(subPhos)$Gene)
    idProt <- unique(rowData(subProt)$Gene)
    tibble(sample = n, overlap = length(intersect(idPhos, idProt)),
           onlyPhos = length(setdiff(idPhos, idProt)),
           onlyProt = length(setdiff(idProt, idPhos)))
}) %>% bind_rows() %>%
    pivot_longer(-sample)
ggplot(sumTab, aes(x=sample, y=value, fill = name)) +
    geom_bar(stat="identity")

4.3.3 Test for differentially phosphorylated considering the baseline protein expression

Preprocess

phosData <- testData[["Phosphoproteome"]][, testData$sampleType == "Phospho"]
phosData <- phosData[rowSums(is.na(assay(phosData)))/ncol(phosData) <= 0.25,]
phosMat <- assay(phosData)
phosMat <- log2(phosMat)
phosMat <- medianScaling(phosMat)
assays(phosData)[["norm"]] <- phosMat

protData <- testData[["Proteome"]][, testData$sampleType == "FP"]
protData <- protData[rowSums(is.na(assay(protData)))/ncol(protData) <= 0.25,]
proMat <- assay(protData)
proMat <- log2(proMat)
proMat <- medianScaling(proMat)
assays(protData)[["norm"]] <- proMat

Get proteins detected both at proteome and phosphoproteome level

overlap <- intersect(rowData(phosData)$UniprotID, rowData(protData)$UniprotID)
idMap <- tibble(UniprotID = overlap) %>%
    mutate(idProt = rownames(protData[match(overlap, rowData(protData)$UniprotID),]))
rowData(phosData)$protID <- idMap[match(rowData(phosData)$UniprotID, idMap$UniprotID),]$idProt

phosDataSub <- phosData[!is.na(rowData(phosData)$protID),]

fullMat <- cbind(assays(phosDataSub)[["norm"]],assays(protData)[["norm"]][rowData(phosDataSub)$protID,])

Build design matrix

library(proDA)
designTab <- data.frame(row.names = colnames(fullMat),
                         type = rep(c("Phos","FP"),each=4),
                         conc = rep(c(100, 200, 400, 50), 2))
fit <- proDA(fullMat, design = ~ type*conc ,
             col_data = designTab)

Get results

contra <- "`typePhos:conc`"
rowAnno <- rowData(phosDataSub) %>%
    as_tibble(rownames = "name")
resTab <- test_diff(fit, contra) %>%
    left_join(rowAnno) %>%
    arrange(pval, by = "name")
## Joining, by = "name"
resTab.sig <- filter(resTab, pval < 0.01)

Plot top 9 interactions

pList <- lapply(seq(9), function(i) {
    rec <- resTab.sig[i,]
    plotTab <- designTab %>% as_tibble(rownames = "id") %>%
        mutate(exp = fullMat[rec$name,id])
    ggplot(plotTab, aes(x=factor(conc),y=exp, color = type, group = type)) +
        geom_point() + geom_line() +
        ggtitle(sprintf("%s (%s%s)", rec$Gene, rec$Residue, rec$Position)) +
        theme_bw()
})

cowplot::plot_grid(plotlist= pList, ncol=3)
## Warning: Removed 1 rows containing missing values (geom_point).
## Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 row(s) containing missing values (geom_path).

Table of significant associations

resTab.sig %>% mutate_if(is.numeric, formatC, digits=1) %>%
  DT::datatable()